Brief abstract/introduction/motivation. State what the chapter is about in 1-2 paragraphs. Then, have an introduction video:
Prerequisites
We can mine past changes.
REPO = 'https://github.com/uds-se/debuggingbook.git'
# REPO = '..'
class ChangeCounter:
def __init__(self, repo, filter=None, log=False, **kwargs):
self.repo = repo
self.log = log
if filter is None:
filter = lambda m: True
self.filter = filter
self.changes = {}
self.messages = {}
self.sizes = {}
self.hashes = set()
self.mine(**kwargs)
class ChangeCounter(ChangeCounter):
def mine(self, **kwargs):
miner = RepositoryMining(self.repo, **kwargs)
for commit in miner.traverse_commits():
for m in commit.modifications:
m.hash = commit.hash
m.committer = commit.committer
m.committer_date = commit.committer_date
m.msg = commit.msg
if self.filter(m):
self.update_stats(m)
class ChangeCounter(ChangeCounter):
def update_stats(self, m):
if not m.new_path:
return
node = tuple(m.new_path.split('/'))
if m.hash not in self.hashes:
self.hashes.add(m.hash)
self.update_size(node, len(m.source_code) if m.source_code else 0)
self.update_changes(node, m.msg)
self.update_elems(node, m)
def update_size(self, node, size):
self.sizes[node] = size
def update_changes(self, node, msg):
self.changes.setdefault(node, 0)
self.messages.setdefault(node, [])
self.changes[node] += 1
self.messages[node].append(msg)
def update_elems(self, node, m):
pass
with Timer() as t:
change_counter = ChangeCounter(REPO)
t.elapsed_time()
84.04873938701348
list(change_counter.changes.keys())[:10]
[('README.md',),
('Chapters.makefile',),
('.gitignore',),
('full_notebooks', 'index.ipynb~'),
('binder', 'README.md'),
('notebooks', '99_Appendices.ipynb'),
('docs', 'beta', 'code', '99_Appendices.py'),
('beta', 'html', 'PICS'),
('docs', 'beta', 'html', '99_Appendices.html'),
('html', 'custom.css')]
change_counter.changes[('Chapters.makefile',)]
22
change_counter.messages[('Chapters.makefile',)]
['Initial import', 'First notebooks', 'New: make booktitle, authors configurable in Chapters.makefile', 'Doc fix', 'Doc fix', 'New: Tracer', 'Fix: "Tracer" is ready', 'New chapter structure', 'New: added Debugger to chapters', 'New: depend on shared files, not links', 'Update', 'New: ClassDiagram', 'New chapter', "Renamed 'Invariants' to 'Assertions'", 'Added Time Travel Debugger project', 'Updated synopsis', 'New: ChangeDebugger', 'New: Project 2', 'New: Slicer', 'Moved Slicing before Statistical Debugging', 'New: leaders for missing parts', 'New: Tracking (very early stage)']
change_counter.sizes[('Chapters.makefile',)]
3019
Now, visualize treemap (notebooks/) and give it
class ChangeCounter(ChangeCounter):
def map_node_sizes(self):
# Default: use log scale
return {node: math.log(self.sizes[node]) if self.sizes[node] else 0
for node in self.sizes}
# Alternative: use sqrt size
return {node: math.sqrt(self.sizes[node]) for node in self.sizes}
# Alternative: use absolute size
return self.sizes
def map_node_color(self, node):
if node and node in self.changes:
return self.changes[node]
return None
def map_node_text(self, node):
if node and node in self.changes:
return self.changes[node]
return None
def map_hoverinfo(self):
return 'label+text'
def map_colorscale(self):
return 'YlOrRd'
def map(self):
treemap = ep.Treemap(
self.map_node_sizes(),
text=self.map_node_text,
hoverinfo=self.map_hoverinfo(),
marker_colors=self.map_node_color,
marker_colorscale=self.map_colorscale(),
root_label=self.repo,
branchvalues='total'
)
fig = go.Figure(treemap)
fig.update_layout(margin=dict(l=0, r=0, t=30, b=0))
return fig
change_counter = ChangeCounter(REPO)
change_counter.map()
class FixCounter(ChangeCounter):
def __init__(self, repo, **kwargs):
super().__init__(repo, filter=self.is_fix, **kwargs)
def is_fix(self, m):
return m.msg.startswith("Fix:") and not "show output" in m.msg
class FixCounter(ChangeCounter):
def map_node_text(self, node):
if node and node in self.messages:
return "<br>".join(self.messages[node])
return ""
def map_hoverinfo(self):
return 'label'
fix_counter = FixCounter(REPO)
fix_counter.map()
Our aim: get tuples that are go beyond just files.
magic.from_buffer('''
#include <stdio.h>
int main(int argc, char *argv[]) {
printf("Hello, world!\n")
}
''')
'C source, ASCII text'
DELIMITERS = [
(
# Python
re.compile(r'^Python.*'),
# Beginning of element
re.compile(r'^(async\s+)?(def|class)\s+(?P<name>\w+)\W.*'),
# End of element
re.compile(r'^[^#\s]')
),
(
# Jupyter Notebooks
re.compile(r'^(JSON|exported SGML|Jupyter).*'),
re.compile(r'^\s+"(async\s+)?(def|class)\s+(?P<name>\w+)\W'),
re.compile(r'^(\s+"[^#\s\\]|\s+\])')
),
(
# C source code
re.compile(r'^C source.*'),
re.compile(r'^[^\s].*\s+(?P<name>\w+)\s*[({].*'),
re.compile(r'^[}]')
),
(
# Catchall (like C; should work for any language using { } as delimiters)
re.compile(r'^.*'),
re.compile(r'^[^\s].*\s+(?P<name>\w+)\s*[({].*'),
re.compile(r'^[}]')
)
]
def rxdelim(s):
tp = magic.from_buffer(s)
for rxtp, rxbegin, rxend in DELIMITERS:
if rxtp.match(tp):
return rxbegin, rxend
return None, None
def elem_mapping(s, log=False):
rxbegin, rxend = rxdelim(s)
if rxbegin is None:
return []
mapping = [None]
current_elem = None
lineno = 0
for line in s.split('\n'):
lineno += 1
match = rxbegin.match(line)
if match:
current_elem = match.group('name')
elif rxend.match(line):
current_elem = None
mapping.append(current_elem)
if log:
print(f"{lineno:3} {current_elem}\t{line}")
return mapping
some_c_source = """
#include <stdio.h>
int foo(int x) {
return x;
}
struct bar {
int x, y;
}
int main(int argc, char *argv[]) {
return foo(argc);
}
"""
some_c_mapping = elem_mapping(some_c_source, log=True)
1 None
2 None #include <stdio.h>
3 None
4 foo int foo(int x) {
5 foo return x;
6 None }
7 None
8 bar struct bar {
9 bar int x, y;
10 None }
11 None
12 main int main(int argc, char *argv[]) {
13 main return foo(argc);
14 None }
15 None
16 None
some_python_source = """
def foo(x):
return x
class bar(blue):
x = 25
def f(x):
return 26
def main(argc):
return foo(argc)
"""
some_python_mapping = elem_mapping(some_python_source, log=True)
1 None 2 foo def foo(x): 3 foo return x 4 foo 5 bar class bar(blue): 6 bar x = 25 7 bar def f(x): 8 bar return 26 9 bar 10 main def main(argc): 11 main return foo(argc) 12 main 13 main
# some_jupyter_source = open("Slicer.ipynb").read()
# some_jupyter_mapping = elem_mapping(some_jupyter_source, log=False)
class FineChangeCounter(ChangeCounter):
def changed_elems(self, mapping, start, length=0):
elems = set()
for line in range(start, start + length + 1):
if line < len(mapping) and mapping[line]:
elems.add(mapping[line])
return elems
def elem_size(self, elem, mapping, source):
source_lines = [''] + source.split('\n')
size = 0
for line_no in range(len(mapping)):
if mapping[line_no] == elem:
size += len(source_lines[line_no])
return size
fine_change_counter = FineChangeCounter(REPO)
assert fine_change_counter.changed_elems(some_python_mapping, 4) == {'foo'}
assert fine_change_counter.changed_elems(some_python_mapping, 4, 1) == {'foo', 'bar'}
assert fine_change_counter.changed_elems(some_python_mapping, 10, 2) == {'main'}
class FineChangeCounter(FineChangeCounter):
def update_elems(self, node, m):
old_source = m.source_code_before if m.source_code_before else ""
new_source = m.source_code if m.source_code else ""
patches = diff(old_source, new_source)
old_mapping = elem_mapping(old_source)
new_mapping = elem_mapping(new_source)
elems = set()
for patch in patches:
old_start_line = patch.start1 + 1
new_start_line = patch.start2 + 1
for (op, data) in patch.diffs:
data_length = data.count('\n')
if op == diff_match_patch.DIFF_INSERT:
elems |= self.changed_elems(old_mapping, old_start_line)
elems |= self.changed_elems(new_mapping, new_start_line,
data_length)
elif op == diff_match_patch.DIFF_DELETE:
elems |= self.changed_elems(old_mapping, old_start_line,
data_length)
elems |= self.changed_elems(new_mapping, new_start_line)
old_start_line += data_length
new_start_line += data_length
for elem in elems:
elem_node = node + (elem,)
self.update_size(elem_node,
self.elem_size(elem, new_mapping, new_source))
self.update_changes(elem_node, m.msg)
with Timer() as t:
fine_change_counter = FineChangeCounter(REPO)
t.elapsed_time()
836.9161813980318
fine_change_counter.map()
For those only interested in using the code in this chapter (without wanting to know how it works), give an example. This will be copied to the beginning of the chapter (before the first section) as text with rendered input and output.
# ignore
from ClassDiagram import display_class_hierarchy
# ignore
display_class_hierarchy([FineChangeCounter, FixCounter],
project='debuggingbook')
Link to subsequent chapters (notebooks) here, as in:
Cite relevant works in the literature and put them into context, as in:
The idea of ensuring that each expansion in the grammar is used at least once goes back to Burkhardt \cite{Burkhardt1967}, to be later rediscovered by Paul Purdom \cite{Purdom1972}.
Close the chapter with a few exercises such that people have things to do. To make the solutions hidden (to be revealed by the user), have them start with
**Solution.**
Your solution can then extend up to the next title (i.e., any markdown cell starting with #).
Running make metadata will automatically add metadata to the cells such that the cells will be hidden by default, and can be uncovered by the user. The button will be introduced above the solution.
Text of the exercise
# Some code that is part of the exercise
pass
Some more text for the exercise
Text of the exercise